package org.luosl.webmagicx

import java.lang.reflect.Constructor

import org.luosl.webmagicx.conf._
import org.luosl.webmagicx.handler.AbstractHandler
import org.luosl.webmagicx.pipeline.AbstractPipeline
import org.luosl.webmagicx.processor.{AbstractProcessor, GeneralProcessor}
import us.codecraft.webmagic.{Request, Spider, Task, proxy}
import us.codecraft.webmagic.scheduler.Scheduler
import org.luosl.webmagicx.ComponentType._
import org.luosl.webmagicx.urlcreator.AbstractUrlCreator
import us.codecraft.webmagic
import us.codecraft.webmagic.downloader.{AbstractDownloader, HttpClientDownloader}
import us.codecraft.webmagic.proxy.SimpleProxyProvider

import scala.collection.JavaConverters._

/**
  * 爬虫描述
  * @param sc sc
  */
case class SpiderCell(sc:SpiderConf) {

  val id:String = sc.id
  /** 页面处理器 **/
  val processor:AbstractProcessor = new GeneralProcessor(sc)
  /**  爬虫实例 **/
  val spider:Spider = Spider.create(processor).setUUID(sc.id)
  /**  起始url构造器 **/
  val startUrlCreators:Seq[AbstractUrlCreator] = sc.startUrlCreators
    .map(comp=> createComponent(sc, spider, Option(comp))(urlCreatorType))
  /**  页面下载器 **/
  val downloader: AbstractDownloader = createDownloader(sc)
  /** 调度器 **/
  val scheduler: Scheduler = createComponent(sc, spider, sc.components.scheduler)(schedulerType)
  /**  处理器 **/
  val handlers:Seq[AbstractHandler] = sc.components.handlers
    .map(comp=> createComponent(sc, spider, Option(comp))(handlerType))
  /**  管道 **/
  val pipelines:Seq[AbstractPipeline] = sc.components.pipelines
    .map(comp=> createComponent(sc, spider, Option(comp))(pipelineType))

  initSpider()

  /**
    * 创建爬虫组件
    * @param conf conf
    * @param task task
    * @param componentOpt componentOpt
    * @param compType compType
    * @return
    */
  def createComponent[T](conf:SpiderConf, task:Task, componentOpt: Option[Component])(compType:Class[T]):T = {
    componentOpt match {
      case Some(Component(className,props)) =>
        val clazz:Class[_] = Class.forName(className)
        val constructor:Constructor[_] = clazz.getConstructor(classOf[SpiderConf], classOf[Task], classOf[XmlProps])
        constructor.newInstance(conf, task, props).asInstanceOf[T]
      case None =>
        compType match {
          case `schedulerType` =>
            val queueClassName:String = "org.luosl.webmagicx.scheduler.PriorityQueueScheduler"
            val defSchedulerComp:Component = Component(queueClassName, XmlProps(queueClassName))
            createComponent(conf, task, Option(defSchedulerComp))(compType)
          case `handlerType` => null.asInstanceOf[T]
          case `pipelineType` => null.asInstanceOf[T]
        }
    }
  }

  /**
    * 创建下载器 并同时设置代理
    * @param sc sc
    * @return
    */
  def createDownloader(sc:SpiderConf):AbstractDownloader = {
    val httpClientDownloader:HttpClientDownloader = new HttpClientDownloader
    if(sc.proxies.nonEmpty){
      val proxies:Seq[proxy.Proxy] = sc.proxies.map{
        case Proxy(host,port,None,None)=> new webmagic.proxy.Proxy(host,port)
        case Proxy(host,port,Some(""),Some(""))=> new webmagic.proxy.Proxy(host,port)
        case Proxy(host,port,Some(user),Some(password)) => new webmagic.proxy.Proxy(host,port,user,password)
        case _ => throw ConfException(s"无效的代理配置")
      }
      val proxyProvider:SimpleProxyProvider = new SimpleProxyProvider(proxies.asJava)
      httpClientDownloader.setProxyProvider(proxyProvider)
    }
    httpClientDownloader
  }

  /**
    * 获取起始url
    * @return
    */
  def startRequests():Seq[Request] = startUrlCreators.flatMap(_.requestsWithParam())

  /**
    * 初始化爬虫
    */
  def initSpider(): Unit ={
    spider.setDownloader(downloader)
      .setScheduler(scheduler)
      .setExitWhenComplete(true)
      .thread(sc.attribute.threadNum)
      .addRequest(startRequests():_*)
    handlers.foreach(spider.addHandler)
    pipelines.foreach(spider.addPipeline)
  }

  /**
    * 启动爬虫
    */
  def start(): Unit = spider.start()

  /**
    * 重启爬虫
    */
  def restart(): Unit = {
    spider.addRequest(startRequests():_*)
    start()
  }

  /**
    * 停止爬虫
    */
  def stop(): Unit = spider.stop()

  /**
    * 爬虫状态
    * @return
    */
  def status():Spider.Status = spider.getStatus

}

/**
  * 爬虫组件类型
  */
object ComponentType{
  val schedulerType:Class[Scheduler] = classOf[Scheduler]
  val handlerType:Class[AbstractHandler] = classOf[AbstractHandler]
  val pipelineType:Class[AbstractPipeline] = classOf[AbstractPipeline]
  val urlCreatorType:Class[AbstractUrlCreator] = classOf[AbstractUrlCreator]
}

